* Load and clean HPACC LMIC Pt 1 dataset
use "HPACC_LMIC_Pt1_2024-11-26", clear

keep Country countryGDPclass p_id stratum stratum_num year psu psu_num svy w3 age sex pregnant educat3 bmi hbg_new fbg_new fast_new hba1c_p sbp_avg dbp_avg statin chol_med chol_med2 dia_med_new insulin_new

sort Country p_id

drop if Country == "South Africa DHS"

merge m:1 Country p_id using "HPACC_DUA_SA.dta" // merge in a1c
drop _merge

replace hba1c_p = hba1c_p_new if Country == "South Africa"
drop hba1c_p_new

save "HPACC_LMIC_Pt1_keyvars.dta", replace

* Load and clean HPACC LMIC Pt 2 dataset
use "HPACC_LMICs_Pt2_2024-11-26", clear
replace fbg_new = ex_fbg if Country == "Argentina"

drop if Country == "India"
replace Country = "India" if Country == "IndiaLASI" // India survey team suggests not using the DHS/NFHS and instead using LASI/LASI-DAD

clonevar prim_key = p_id // prim_key is the LASI-DAD unique identifier

merge m:1 prim_key using "H_LASI_DAD_b1.dta", keepusing(r1hba1c r1labwgt r1hmbmi r1hsysto r1hdiasto) // merge in bmi, sbp, and dbp
drop _merge

mi unset, asis

replace hba1c_p = r1hba1c if Country == "India" // update a1c in LASI-DAD
recast float hba1c_p
format hba1c_p %9.1f

rename r1hmbmi bmi_lasi_dad // rename the LASI-DAD bmi
replace bmi_lasi_dad = . if bmi_lasi_dad > 100 // remove missing codes
replace bmi = bmi_lasi_dad if Country == "India" & bmi_lasi_dad != . // update bmi in LASI-DAD

rename r1hsysto sbp_avg_lasi_dad // rename the LASI-DAD sbp
replace sbp_avg_lasi_dad = . if sbp_avg_lasi_dad >1100 // remove missing codes
replace sbp_avg = sbp_avg_lasi_dad if Country == "India" & sbp_avg_lasi_dad != . // update sbp in LASI-DAD

rename r1hdiasto dbp_avg_lasi_dad // rename the LASI-DAD dbp
replace dbp_avg_lasi_dad = . if dbp_avg_lasi_dad >1100 // remove missing codes
replace dbp_avg = dbp_avg_lasi_dad if Country == "India" & dbp_avg_lasi_dad != . // update dbp in LASI-DAD

replace w3 = r1labwgt if Country == "India" // update w3 weights in LASI-DAD

keep Country countryGDPclass p_id stratum stratum_num year psu psu_num svy w3 age sex pregnant educat3 bmi hbg_new fbg_new fast_new hba1c_p sbp_avg dbp_avg statin chol_med chol_med2 dia_med_new insulin_new

sort Country p_id
save "HPACC_LMIC_Pt2_keyvars.dta", replace

* Load and clean HPACC HIC dataset
use "HPACC_main_HIC_2024-11-26", clear
replace age = ex_age if Country == "England"

keep Country countryGDPclass p_id stratum stratum_num year psu psu_num svy w3 age sex pregnant educat3 bmi hbg_new fbg_new fast_new hba1c_p sbp_avg dbp_avg statin chol_med chol_med2 dia_med_new insulin_new

sort Country p_id
save "HPACC_main_HIC_keyvars.dta", replace

* Load and clean HPACC DUA dataset
use "HPACC_DUA_2024-11-26", clear

* Merge in the statin variable for Germany
gen germany = .
replace germany = 1000000+_n
replace germany = 1 if Country == "Germany"

merge 1:1 germany p_id using "DEGS1_0285_v4 - with p_id.dta", keepusing(Statin_if)

recode Statin_if (2=0)
replace statin = Statin_if if Country == "Germany"

gen year = ex_year
replace year = "2008-2011" if Country == "Germany"
replace year = "2013-2016" if Country == "Greece"
replace year = "2014-2016" if Country == "Malta"
replace year = "2016-2017" if Country == "Pakistan"
replace year = "2014-2015" if Country == "Singapore"
replace year = "2014-2017" if Country == "Venezuela"

drop if Country == "South Africa"

keep Country countryGDPclass p_id stratum stratum_num year psu psu_num w3 age sex pregnant educat3 bmi hbg_new fbg_new fast_new hba1c_p sbp_avg dbp_avg statin chol_med chol_med2 dia_med_new insulin_new

sort Country p_id
save "HPACC_DUA_keyvars.dta", replace

* Load and clean Aruba STEPS dataset
use "Aruba_STEPS_2023_14Nov2024", clear
ren wstep3 w3

gen educat3 = 0 if educat == 0 // no formal schooling
replace educat3 = 1 if educat == 1 | educat == 2 // primary school
replace educat3 = 2 if educat == 3 | educat == 4 // secondary school or above

gen hbg_new = hbg
gen fbg_new = fbg
gen fast_new = fast
gen dia_med_new = dia_med
gen insulin_new = insulin

destring stratum, gen(stratum_num)
destring psu, gen(psu_num)

keep Country p_id stratum stratum_num year psu psu_num svy w3 age sex pregnant educat3 bmi hbg_new fbg_new fast_new hba1c_p sbp_avg dbp_avg statin chol_med chol_med2 dia_med_new insulin_new

sort Country p_id
save "Aruba_STEPS_2023.dta", replace

* Load and clean Armenia STEPS dataset
use "armenia steps 2023.02.22.dta", clear
gen Country = "Armenia"

ren stratum_pre stratum
gen stratum_num = stratum
destring psu, gen(psu_num)

keep Country p_id stratum stratum_num year psu psu_num svy w3 age sex pregnant educat3 bmi hbg_new fbg_new fast_new sbp_avg dbp_avg statin chol_med2 dia_med_new insulin_new

sort Country p_id
save "Armenia_STEPS_2023.dta", replace

* Append HPACC datasets
use "HPACC_LMIC_Pt1_keyvars.dta", clear
append using "HPACC_LMIC_Pt2_keyvars.dta", force
append using "HPACC_main_HIC_keyvars.dta", force
append using "HPACC_DUA_keyvars.dta", force
append using "Aruba_STEPS_2023.dta", force
append using "Armenia_STEPS_2023.dta", force
save "HPACC_Appended_original.dta", replace

* Drop ineligible countries and rename a couple of countries
sort Country

drop if Country == "China" & year == "2009" // prior to 2010
drop if Country == "Gambia" & year == "2010" // no diabetes biomarker
drop if Country == "Ghana" & year == "2007/8" // no diabetes biomarker
drop if Country == "Grenada" & year == "2009-11" // no diabetes biomarker
drop if Country == "Niger" & year == "2007" // prior to 2010
drop if Country == "Russian Federation" & year == "2007/8" // 2008, no diabetes biomarker
drop if Country == "Libya" & year == "2009"  // 2009 only
drop if Country == "Sierra Leone" & year == "2009"  // 2009 only, no diabetes
drop if Country == "Tonga" & year == "2017"  // No diabetes biomarker
drop if Country == "Albania" // no fasting variable
drop if Country == "Nigeria" // no fasting variable
drop if Country == "Belize" // prior to 2010
drop if Country == "Egypt" // no diabetes biomarker
drop if Country == "Tokelau" // missing educat3
replace Country = "Chile" if Country == "Chile ENS"
replace Country = "China" if Country == "China CHARLS"

save "HPACC_Appended_original.dta", replace

////////////////////////////////////////////////////////////////////////////////
/////////////////// GENERAL DATASET CLEANING AND PREPARATION ///////////////////
////////////////////////////////////////////////////////////////////////////////
	
* Clean missingness		
	foreach v of varlist hbg_new insulin_new dia_med_new statin chol_med chol_med2 {
		replace `v' = . if inlist(`v',333333333)  	// 333333333 = no consent eligible
		replace `v' = . if inlist(`v',555555555, 555555584)  	// 555555555 = not eligible
		replace `v' = . if inlist(`v',666666666, 666666688)  	// 666666666 = variable not in the dataset coded as .
		replace `v' = 0 if inlist(`v',777777777,77,777777792,.d)  			// 777777777 = subject did not know
		replace `v' = . if inlist(`v',88,855555554.7,855555571.2,888888888,888888896,888888896,.r)		// 888888888 = subject refused to answer
		replace `v' = 0 if inlist(`v',977777785.6,977777776.8,999999999,1000000000)  	// 999999999 = missing as skip pattern recoded as 0	
	}			
	
	foreach v of varlist sbp* dbp*  {
		replace `v' = . if inlist(`v',555555555)  	// 555555555 = not eligible
		replace `v' = . if inlist(`v',666666666, 666666688)  	// 666666666 = variable not in the dataset coded as .
		replace `v' = . if inlist(`v',777777777,77,777777792,.d)  			// 777777777 = subject did not know
		replace `v' = . if inlist(`v',855555554.7,855555571.2,888888888,888888896,888888896,.r)		// 888888888 = subject refused to answer
	}	
		
* Clean income group
replace countryGDPclass = 3 if Country == "Armenia"
replace countryGDPclass = 4 if Country == "Aruba"

* Clean survey year
clonevar survey_year = year
replace survey_year = "2017" if Country == "Algeria"
replace survey_year = "2010" if year == "2008-2010"
replace survey_year = "2010" if year == "2008-2011" 
replace survey_year = "2012" if year == "2011-2013"
replace survey_year = "2014" if year == "2013-2014"
replace survey_year = "2014" if year == "2013-2015"
replace survey_year = "2015" if year == "2013-2016"
replace survey_year = "2015" if year == "2014-2015"
replace survey_year = "2015" if year == "2014-2016"
replace survey_year = "2016" if year == "2014-2017"
replace survey_year = "2016" if year == "2015-2016"
replace survey_year = "2016" if year == "2015-2017"
replace survey_year = "2017" if year == "2016-2017"
replace survey_year = "2017" if year == "2015 - Mar2020"
replace survey_year = "2018" if year == "2017-2018"
replace survey_year = "2018" if year == "2017-2019"
replace survey_year = "2019" if year == "2018-2019"
replace survey_year = "2020" if year == "2019-2020"

* Label sex
recode sex (0=1)(1=2)
replace sex = . if sex > 2
label define sex_label 1 "Men" 2 "Women", modify
label values sex sex_label

* Create age_cat4 variable
gen age_cat4 = .
replace age_cat4 = 1 if age >= 30 & age < 40
replace age_cat4 = 2 if age >= 40 & age < 50
replace age_cat4 = 3 if age >= 50 & age < 60
replace age_cat4 = 4 if age >= 60 & age < 70
label define agecat4_lbl 1 "30-39" 2 "40-49" 3 "50-59" 4 "60-69"
label values age_cat4 agecat4_lbl

* Create age_cat3 variable
gen age_cat3 = .
replace age_cat3 = 1 if age >= 40 & age < 50
replace age_cat3 = 2 if age >= 50 & age < 60
replace age_cat3 = 3 if age >= 60 & age < 70
label define agecat3_lbl 1 "40-49" 2 "50-59" 3 "60-69"
label values age_cat3 agecat3_lbl

* Generate bmi categories
replace bmi = . if bmi > 33333

gen long bmi_cat4=.
replace bmi_cat4=0 if bmi<18.5
replace bmi_cat4=1 if bmi>=18.5 & bmi<25
replace bmi_cat4=2 if bmi>=25 & bmi<30
replace bmi_cat4=3 if bmi>=30 & bmi!=.
label variable bmi_cat4 "BMI categories"
label define bmi_cat_label 0 "<18.5" 1 "18.5-<25" 2 "25-<30" 3 "≥30", modify
label values bmi_cat4 bmi_cat_label

* Generate regions
	gen ncdrisc_subregioncat = .

	* Sub-Saharan Africa (48)	
		 // East Africa (17)
		replace ncdrisc_subregioncat = 2 if inlist(Country,"Burundi","Comoros","Djibouti","Eritrea","Ethiopia","Zanzibar","Kenya","Madagascar","Malawi")
		replace ncdrisc_subregioncat = 2 if inlist(Country,"Mauritius","Mozambique","Rwanda","Seychelles","Somalia","Sudan","Tanzania","Uganda","Zambia")
		
		// Southern Africa (6)
		replace ncdrisc_subregioncat = 3 if inlist(Country,"Botswana","Lesotho","Namibia","South Africa","Eswatini","Zimbabwe")

		// West Africa (19)
		replace ncdrisc_subregioncat = 4 if inlist(Country,"Benin","Burkina Faso","Cabo Verde","Cameroon","Chad","Cote d'Ivoire","Gambia","Ghana","Guinea")
		replace ncdrisc_subregioncat = 4 if inlist(Country,"Guinea Bissau","Liberia","Mali","Mauritania","Niger","Nigeria","Sao Tome and Principe","Senegal","Sierra Leone")
		replace ncdrisc_subregioncat = 4 if inlist(Country,"Togo")

	* Central Asia, Middle East and North Africa (28)
		// Central Asia (9)
		replace ncdrisc_subregioncat = 5 if inlist(Country,"Armenia","Azerbaijan","Georgia","Kazakhstan","Kyrgyzstan","Mongolia","Tajikistan","Turkmenistan","Uzbekistan")

		// Middle East and North Africa (19)
		replace ncdrisc_subregioncat = 6 if inlist(Country,"Algeria","Bahrain","Egypt","Iran","Iraq","Jordan","Kuwait","Lebanon","Libya")
		replace ncdrisc_subregioncat = 6 if inlist(Country,"Morocco","Palestine","Oman","Qatar","Saudi Arabia","Syrian Arab Republic","Tunisia","Turkey","United Arab Emirates")
	
	* South Asia (6)
		replace ncdrisc_subregioncat = 7 if inlist(Country,"Afghanistan","Bangladesh","Bhutan","India","Nepal","Pakistan")

	* East and Southeast Asia (16)
		// East Asia (4)
		replace ncdrisc_subregioncat = 8 if inlist(Country,"China","Hong Kong","North Korea","Taiwan")
		// Southeast Asia (12)
		replace ncdrisc_subregioncat = 9 if inlist(Country,"Brunei","Cambodia","Indonesia","Laos","Malaysia","Maldives","Myanmar","Philippines")
		replace ncdrisc_subregioncat = 9 if inlist(Country,"Sri Lanka","Thailand","Timor Leste","Vietnam")

	* Oceania (17)
		// Polynesia and Micronesia (13)
		replace ncdrisc_subregioncat = 10 if inlist(Country,"American Samoa","Cook Islands","French Polynesia","Kiribati","Marshall Islands","Micronesia","Nauru","Niue")
		replace ncdrisc_subregioncat = 10 if inlist(Country,"Palau","Samoa","Tokelau","Tonga","Tuvalu","Wallis and Futuna")

		// Melanesia (4)
		replace ncdrisc_subregioncat = 11 if inlist(Country,"Fiji","Papua New Guinea","Solomon Islands","Vanuatu")

	* High-income Asia Pacific (3)
		replace ncdrisc_subregioncat = 12 if inlist(Country,"Japan","Singapore","South Korea")
		
	* Latin America and Caribbean (35)
		// Andean Latin America (3)
		replace ncdrisc_subregioncat = 13 if inlist(Country,"Bolivia","Ecuador","Peru")
		
		// Caribbean (18)
		replace ncdrisc_subregioncat = 14 if inlist(Country,"Antigua and Barbuda","Bahamas","Barbados","Belize","Bermuda","Cuba","Dominica","Dominican Republic")
		replace ncdrisc_subregioncat = 14 if inlist(Country,"Grenada","Cayman Islands")
		replace ncdrisc_subregioncat = 14 if inlist(Country,"Guyana","Haiti","Jamaica","Puerto Rico","Saint Kitts and Nevis","Saint Lucia","St. Vincent & the Grenadines","Trinidad and Tobago")
		
		// Central Latin America (9)
		replace ncdrisc_subregioncat = 15 if inlist(Country,"Costa Rica","El Salvador","Guatemala","Mexico","Nicaragua","Panama","Venezuela","Aruba")
		
		// Southern Latin America (5)
		replace ncdrisc_subregioncat = 16 if inlist(Country,"Argentina","Brazil","Chile","Paraguay","Uruguay")

	* High-income Western countries (27)
		// High-income English-speaking countries* (6)
		replace ncdrisc_subregioncat = 17 if inlist(Country,"Australia","Canada","Ireland","Zealand","England","United States of America")
		
		// North Western Europe (12)
		replace ncdrisc_subregioncat = 18 if inlist(Country,"Austria","Belgium","Denmark","Finland","Germany","Greenland","Iceland")
				
		// South Western Europe (9)
		replace ncdrisc_subregioncat = 19 if inlist(Country,"Andorra","Cyprus","France","Greece","Israel","Italy","Malta","Portugal","Spain")
		
	* Central and Eastern Europe (20)
		// Central Europe (13)
		replace ncdrisc_subregioncat = 20 if inlist(Country,"Albania","Bosnia and Herzegovina","Bulgaria","Croatia","Czech Republic","Hungary","Macedonia (TFYR)","Montenegro","Poland")
		replace ncdrisc_subregioncat = 20 if inlist(Country,"Romania","Serbia","Slovakia","Slovenia")
		
		// Eastern Europe (7)
		replace ncdrisc_subregioncat = 21 if inlist(Country,"Belarus","Estonia","Latvia","Lithuania","Moldova","Russian Federation","Ukraine")
				
	* Generate and label super regions
		gen ncdrisc_regioncat = .
		replace ncdrisc_regioncat = 1 if inlist(ncdrisc_subregioncat,1,2,3,4)
		replace ncdrisc_regioncat = 2 if inlist(ncdrisc_subregioncat,5,6)
		replace ncdrisc_regioncat = 3 if inlist(ncdrisc_subregioncat,7)
		replace ncdrisc_regioncat = 4 if inlist(ncdrisc_subregioncat,8,9)
		replace ncdrisc_regioncat = 5 if inlist(ncdrisc_subregioncat,10,11)
		replace ncdrisc_regioncat = 6 if inlist(ncdrisc_subregioncat,12)
		replace ncdrisc_regioncat = 7 if inlist(ncdrisc_subregioncat,13,14,15,16)
		replace ncdrisc_regioncat = 8 if inlist(ncdrisc_subregioncat,17,18,19)
		replace ncdrisc_regioncat = 9 if inlist(ncdrisc_subregioncat,20,21)
		
		label define ncdrisc_regioncat_label ///
			1 "Sub-Saharan Africa" ///
			2 "Central Asia, Middle East and North Africa" ///
			3 "South Asia" ///
			4 "East and Southeast Asia" ///
			5 "Oceania"  ///
			6 "High-income Asia Pacific" ///
			7 "Latin America and Caribbean"  ///
			8 "High-income Western countries" ///
			9 "Central and Eastern Europe", modify //	
		label variable ncdrisc_regioncat "World region (NCD RisC)"
		label values ncdrisc_regioncat ncdrisc_regioncat_label
	
	drop ncdrisc_subregioncat
	
	gsort + ncdrisc_regioncat + Country
	
/*******************************************************************************
CHECK AND UPDATE STATIN OR CHOLESTEROL-LOWERING MEDICATION USE
*******************************************************************************/
	// Argentina
	replace statin=chol_med if Country=="Argentina"
	
	// Algeria
	replace statin=chol_med if Country=="Algeria"
	
	// Cambodia
	replace statin=chol_med2 if Country=="Cambodia"

	// Cayman Islands
	replace statin=chol_med if Country=="Cayman Islands"
	
	// Chile
	replace statin=chol_med if Country=="Chile"

	// China
	replace statin=chol_med if Country=="China"
	
	// Cook Islands
	replace statin=chol_med if Country=="Cook Islands"
	
	// Costa Rica
	replace statin=chol_med if Country=="Costa Rica"
	
	// Czech Republic
	replace statin=chol_med if Country=="Czech Republic"

	// El Salvador
	* This survey has chol_med2 but was not cleaned, see p. 106, variable p110, ENECA-2015.pdf
		frame create merge_variables
		frame change merge_variables
		import delimited "fb135265-54e6-41d1-9914-bae2e2cf6774.csv", encoding(UTF-8) clear 
		keep p110 idencuesta
		gen p_id = idencuesta // id variable
		tostring p_id, replace
				
		gen chol_med2_elsalvador = .
		replace chol_med2_elsalvador = 0 if p110 == 2
		replace chol_med2_elsalvador = 1 if p110 == 1
		replace chol_med2_elsalvador = 999999999 if p110 == 9
		
		gen Country = "El Salvador"
		save "El Salvador - chol_med- 2023_11_22.dta", replace
		
		frame change default
		frame drop merge_variables
		merge m:1 p_id Country using "El Salvador - chol_med- 2023_11_22.dta", keepusing(chol_med2_elsalvador)
		replace chol_med2 = chol_med2_elsalvador if Country == "El Salvador"
		drop _merge
		replace chol_med2 = . if Country == "El Salvador"
		replace chol_med2 = chol_med2_elsalvador if Country == "El Salvador"		
		drop chol_med2_elsalvador
		
	// England
	replace statin=chol_med2 if Country=="England"
	
	// Eritrea
	replace statin=chol_med2 if Country=="Eritrea"

	// Eswatini
	replace statin=chol_med if Country=="Eswatini"
	
	// India
	replace statin=chol_med if Country=="India"
	
	// Indonesia
	replace statin=chol_med if Country=="Indonesia"

	// Iraq
	* Need to recode statin as missing in Iraq because it is a secondary prevention question only
	replace statin=. if Country=="Iraq"
	replace statin=chol_med if Country=="Iraq"

	// Iran
	replace statin=chol_med2 if Country=="Iran"

	// Kazakhstan
	replace statin=chol_med if Country=="Kazakhstan"

	// Laos
	replace statin=chol_med2 if Country=="Laos"

	// Lesotho
	replace statin=chol_med if Country=="Lesotho"

	// Liberia
	replace statin=chol_med2 if Country=="Liberia"
	
	// Marshall Islands
	replace statin=chol_med if Country=="Marshall Islands"

	// Moldova
	replace statin=chol_med if Country=="Moldova"

	// Mongolia
	replace statin=chol_med if Country=="Mongolia"

	// Mozambique
	replace statin=chol_med if Country=="Mozambique"

	// Myanmar
	replace statin=chol_med if Country=="Myanmar"

	// Nauru
	replace statin=chol_med if Country=="Nauru"
	
	// Niue
	replace statin=chol_med2 if Country=="Niue"

	// Palau
	replace statin=chol_med2 if Country=="Palau"

	// Palestine
	replace statin=chol_med2 if Country=="Palestine"

	// Panama
	replace statin=chol_med2 if Country=="Panama"

	// Paraguay	
	replace statin=chol_med if Country=="Paraguay"
	
	// Peru
	replace statin=chol_med if Country=="Peru"

	// Portugal
	replace statin=chol_med if Country=="Portugal"

	// Singapore
	replace statin=chol_med if Country=="Singapore"

	// South Korea
	replace statin=chol_med if Country=="South Korea"

	// Timor Leste
	replace statin=chol_med if Country=="Timor Leste"

	// Trinidad and Tobago
	replace statin=chol_med if Country=="Trinidad and Tobago"
	
	// Tuvalu
	replace statin=chol_med if Country=="Tuvalu"
	
	// Qatar
	replace statin=chol_med2 if Country=="Qatar"

	// Rwanda
	replace statin=chol_med2 if Country=="Rwanda"

	// Samoa
	replace statin=chol_med2 if Country=="Samoa"
	
	// Seychelles
	replace statin=chol_med if Country=="Seychelles"

	// Tanzania
	replace statin=chol_med2 if Country=="Tanzania"

	// Togo
	replace statin=chol_med2 if Country=="Togo"
	
	// United States of America
	replace statin=chol_med if Country=="United States of America"
		
	// Uruguay
	replace statin=chol_med if Country=="Uruguay"
	
	// Vanuatu
	replace statin=chol_med2 if Country=="Vanuatu"

	// Venezuela
	replace statin=chol_med2 if Country=="Venezuela"

	// Vietnam
	replace statin=chol_med if Country=="Vietnam"
	
	// Zambia
	replace statin=chol_med if Country=="Zambia"
	
	// Zanzibar
	replace statin=chol_med2 if Country=="Zanzibar"

drop chol_med chol_med2
	
/*******************************************************************************
GENERATE CLINICAL DIABETES VARIABLE
*******************************************************************************/
replace hba1c_p = . if hba1c_p < 3 | hba1c_p > 17 // outlier rules
replace fbg_new = . if fbg_new <2.2 | (fbg_new >33.3 & fbg_new <1000) // outlier rules
	
	gen a1c_only_country = 0
	replace a1c_only_country = 1 if Country == "Brazil" | Country == "England" | Country == "Germany" | Country == "Haiti" | Country == "India" | Country == "Indonesia" | Country == "Portugal" | Country == "South Africa"
	
	gen a1c_and_fbg_country = 0
	replace a1c_and_fbg_country = 1 if Country=="Barbados" | Country=="Chile" | Country=="China" | Country=="Costa Rica" | Country=="Czech Republic" | Country=="Greece" | Country=="Guyana" | Country == "Iran" | Country == "Kazakhstan" | Country == "Kuwait" | Country == "Mexico" | Country == "Panama" | Country == "Romania" | Country == "Seychelles" | Country=="Singapore" | Country=="South Korea" | Country == "Spain" | Country == "United States of America"
							
	* Assume fasting per survey protocol
	replace fast_new = 1 if Country == "Argentina" & fbg_new != .
	replace fast_new = 1 if Country == "Costa Rica" & fbg_new != .
	replace fast_new = 1 if Country == "Romania" & fbg_new != .
	
	* El Salvador: recode fasting variable
	recode fast_new (0=1) (1=0) if Country == "El Salvador"

	* United States: only analyze data over fasting subsample
	drop if (fast_new==. | fast_new==0) & Country == "United States of America"
	
	* Pakistan: Assume that all people with glucose fasted at least 8 hours according to local survey team
	replace fast_new = 1 if !missing(fbg_new) & Country == "Pakistan"
		
	* Divide fbg_new by 1.10 for Liberia, Togo, Vanuatu, Zanzibar, and Cambodia
	replace fbg_new = fbg_new/1.10 if Country == "Liberia" | Country == "Togo" | Country == "Vanuatu" | Country == "Zanzibar" | Country == "Cambodia"

	* Multiply fbg_new by 1.11 for Eritrea
	replace fbg_new = fbg_new*1.11 if Country == "Eritrea"

/* The key question is FBG <-> A1c conversion

In the TLHL paper, this was the definition (based on Wei N, et al. Diabetes Care 2014;37:1048-51):
HbA1c <7.0% ~ FPG <8.0 mmol/l
HbA1c <8.0% ~ FPG <9.2 mmol/l
Ref: Flood D, et al. Lancet Healthy Longev 2021;2:e340-e51

In the first Lancet Global Diabetes Compact paper, we analyze data using the same definition as in the TLHL paper
Gregg EW, et al. Lancet 2023;401:1302-12

If you go back to the Wei paper, look at Table 2 and focus on T2D
- A1c 6.5-6.99 has a median FBG of 139, and A1c 7.0-7.49 has a FBG of 147. Just taking the midpoint of these is ~143 mg/dl which is 7.9 mmol/l. 8.0 is very close to this.
- Similarly, A1c 7.5-7.99 has a median FBG of 157, and A1c 8.0-8.49 has a FBG of 179. Just taking the midpoint of these is ~168 mg/dl which is 9.3 mmol/l. 9.2 is very close to this */
	
	gen long clin_dia2=.
		replace clin_dia2=0 if fbg_new<7.0 & fast_new>=1 & a1c_only_country == 0
		replace clin_dia2=1 if fbg_new>=7.0 & fbg_new<800 & fast_new>=1	& a1c_only_country == 0
	
		* Countries with only a1c available
		replace clin_dia2=0 if (hba1c_p<6.5) & a1c_only_country == 1
		replace clin_dia2=1 if ((hba1c_p>=6.5 & hba1c_p<800)) & a1c_only_country == 1
				
		* Use of glucose-lowering medication
		replace clin_dia2 = 1 if dia_med_new == 1 | insulin_new == 1 
			
		* Ensure missing if no diabetes biomarker
		replace clin_dia2 = . if missing(hba1c_p) & missing(fbg_new)

/*******************************************************************************
CHECK SURVEY DESIGN VARIABLES
*******************************************************************************/
replace w3 = . if Country == "Haiti" & w3 == 555555555
replace w3 = . if w3 == 0
replace w3 = . if w3 > 6666666
replace w3 = w3*100 if Country == "India" & sex == 1

******************************************************
* generate psu_num and stratum_num for Armenia and Aruba	
******************************************************
gen str30 psu_abc = ""
gen long psu2 = .

gen str30 stratum_abc = ""
gen long stratum2 = .

* 1) Handle Armenia
* --- PSU ---
replace psu_abc = "AAA" + psu if Country=="Armenia"
encode psu_abc if Country=="Armenia", gen(temp_psu2)
replace psu2 = temp_psu2 if Country=="Armenia"
drop temp_psu2
replace psu_num = 1001000000 + psu2 if Country=="Armenia"

* --- Stratum ---
replace stratum_abc = "AAA" + stratum if Country=="Armenia"
encode stratum_abc if Country=="Armenia", gen(temp_str2)
replace stratum2 = temp_str2 if Country=="Armenia"
drop temp_str2
replace stratum_num = 1001000000 + stratum2 if Country=="Armenia"

* 2) Handle Aruba
* --- PSU ---
replace psu_abc = "AAB" + psu if Country=="Aruba"
encode psu_abc if Country=="Aruba", gen(temp_psu2)
replace psu2 = temp_psu2 if Country=="Aruba"
drop temp_psu2
replace psu_num = 1002000000 + psu2 if Country=="Aruba"

* --- Stratum ---
replace stratum_abc = "AAB" + stratum if Country == "Aruba"
encode stratum_abc if Country == "Aruba", gen(temp_stratum2)
replace stratum2 = temp_stratum2 if Country == "Aruba"
drop temp_stratum2
replace stratum_num = 1002000000 + stratum2 if Country == "Aruba"

////////////////////////////////////////////////////////////////////////////////
/////////////////////////// SUBPOPULATION VARIABLES ////////////////////////////
////////////////////////////////////////////////////////////////////////////////

keep if !missing(w3) & !missing(psu_num) & !missing(stratum_num) & !missing(clin_dia2)

* Primary outcomes
* 1. % diagnosed diabetes
gen byte sample_dm_diag = 0
replace sample_dm_diag = 1 if clin_dia2 == 1 & (age >=30 & age < 70) & pregnant != 1 & !missing(sex) & !missing(bmi_cat4) & !missing(educat3)

* 2. % glycemic control
gen byte sample_dm_control_diag = 0 if !missing(hbg_new)
replace sample_dm_control_diag = 1 if clin_dia2 == 1 & hbg_new == 1 & (age >=30 & age < 70) & pregnant != 1 & !missing(sex) & !missing(bmi_cat4) & !missing(educat3)

* 3. % blood pressure control
gen byte sample_bp_control_diag = 0 if !missing(hbg_new)
replace sample_bp_control_diag = 1 if clin_dia2 == 1 & hbg_new == 1 & (age >=30 & age < 70) & pregnant != 1 & !missing(sex) & !missing(bmi_cat4) & !missing(educat3)

* 4. % statin use
gen byte sample_statin_dm_diag = 0 if !missing(hbg_new)
replace sample_statin_dm_diag = 1 if clin_dia2 == 1 & hbg_new == 1 & (age >=40 & age < 70) & pregnant != 1 & !missing(sex) & !missing(bmi_cat4) & !missing(educat3)

* Subpop variables for sensitivity analyses
* 2. % glycemic control -> denominator = all with diabetes
gen byte sample_dm_control = 0
replace sample_dm_control = 1 if clin_dia2 == 1 & (age >=30 & age < 70) & pregnant != 1 & !missing(sex) & !missing(bmi_cat4) & !missing(educat3)

* 3. % blood pressure control -> denominator = all with diabetes
gen byte sample_bp_control = 0
replace sample_bp_control = 1 if clin_dia2 == 1 & (age >=30 & age < 70) & pregnant != 1 & !missing(sex) & !missing(bmi_cat4) & !missing(educat3)

* 4. % statin use -> denominator = all with diabetes
gen byte sample_statin_dm = 0
replace sample_statin_dm = 1 if clin_dia2 == 1 & (age >=40 & age < 70) & pregnant != 1 & !missing(sex) & !missing(bmi_cat4) & !missing(educat3)

/*******************************************************************************
GENERATE MAIN ANALYSIS OUTCOMES
*******************************************************************************/
* 1. % diagnosed diabetes
	clonevar dm_diag = hbg_new
	replace dm_diag = . if dm_diag == -1
	
* 2. % glycemic control
	gen dm_control = .
	replace dm_control=0 if fbg_new>=9.2 & fbg_new<800 & fast_new>=1 & a1c_only_country == 0
	replace dm_control=1 if fbg_new<9.2 & fast_new>=1 & a1c_only_country == 0
	
	* Countries with a1c only		
	replace dm_control=0 if hba1c_p>=8 & hba1c_p<800 & a1c_only_country == 1
	replace dm_control=1 if hba1c_p<8 & a1c_only_country == 1
	
	* Countries with both a1c and fbg
	replace dm_control=0 if hba1c_p>=8 & hba1c_p<800 & a1c_and_fbg_country == 1
	replace dm_control=1 if hba1c_p<8 & a1c_and_fbg_country == 1
	
* 3. % blood pressure control
	replace sbp_avg = . if sbp_avg <70 | sbp_avg > 240 // outliers
	replace dbp_avg = . if dbp_avg <40 | dbp_avg > 130 // outliers

	gen bp_control = .
	replace bp_control = 0 if inrange(sbp_avg,140,300) | inrange(dbp_avg,90,300)
	replace bp_control = 1 if sbp_avg <140 & dbp_avg <90

* 4. % statin use
	clonevar statin_dm = statin
	replace statin_dm = . if statin_dm > 1

/*******************************************************************************
Sensitivity analysis for glycemic control <7.0%
*******************************************************************************/
	gen dm_control2 = .
	replace dm_control2=0 if fbg_new>=8 & fbg_new<800 & fast_new>=1 & a1c_only_country == 0
	replace dm_control2=1 if fbg_new<8 & fast_new>=1 & a1c_only_country == 0
	
	* Countries with a1c only		
	replace dm_control2=0 if hba1c_p>=7 & hba1c_p<800 & a1c_only_country == 1
	replace dm_control2=1 if hba1c_p<7 & a1c_only_country == 1
	
	* Countries with both a1c and fbg
	replace dm_control2=0 if hba1c_p>=7 & hba1c_p<800 & a1c_and_fbg_country == 1
	replace dm_control2=1 if hba1c_p<7 & a1c_and_fbg_country == 1

/*******************************************************************************
Sensitivity analysis for glycemic control using the predicted A1c or real A1c values
*******************************************************************************/
gen fbg_venous = 0
replace fbg_venous = 1 if country == "Bangladesh" | country == "Barbados" | country == "Brunei" | country == "Chile" | country == "China" | country == "Costa Rica" | country == "Czech Republic" | country == "El Salvador" | country == "Germany" | country == "Greece" | country == "Guyana" | country == "Iran" | country == "Iraq" | country == "Kazakhstan" | country == "Kuwait" | country == "Lebanon" | country == "Mexico" | country == "Pakistan" | country == "Palestine" | country == "Panama" | country == "Peru" | country == "Romania" | country == "Singapore" | country == "South Korea" | country == "Spain" | country == "United States of America" | country == "Venezuela"
replace fbg_venous = . if fbg_new == .

* Fit the model
mixed hba1c_p i.sex i.age_cat4 i.ncdrisc_regioncat i.bmi_cat4 fbg_new i.ncdrisc_regioncat#c.fbg_new fbg_venous if a1c_and_fbg_country == 1 || country:

* Generate predicted values (fitted values) for each observation
predict hba1c_p_hat, xb

* Keep the prediction only for records lacking measured A1c
generate hba1c_new = hba1c_p if !missing(hba1c_p)
replace hba1c_new = hba1c_p_hat if missing(hba1c_p)
		
* % glycemic control
	gen dm_control3 = .
	replace dm_control3=0 if hba1c_new>=8 & hba1c_new<800
	replace dm_control3=1 if hba1c_new<8 
	
sort country

local today : display %tdCCYYNNDD date(c(current_date), "DMY")

gen steps = 1
replace steps = 0 if country == "Argentina" | country == "Barbados" | country == "Brazil" | country == "Chile" | country == "China" | country == "Czech Republic" | country == "El Salvador" | country == "England" | country == "Germany" | country == "Greece" | country == "Haiti" | country == "India" | country == "Indonesia" | country == "Malta" | country == "Marshall Islands" | country == "Mexico" | country == "Namibia" | country == "Pakistan" | country == "Panama" | country == "Peru" | country == "Portugal" | country == "Romania" | country == "Seychelles" | country == "Singapore" | country == "South Africa" | country == "South Korea" | country == "Spain" | country == "United States of America" | country == "Venezuela"

save "HPACC_Maindata_appended.dta", replace
 
* Load and clean World Population Prospects (WPP) 2021 estimates
* Please find link to download here: https://population.un.org/wpp/downloads?folder=Standard%20Projections&group=Population
clear

import excel "C:\Users\grace\OneDrive\Diabetes\WPP2024_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", sheet("Estimates")

drop in 1/16
drop A B D E F G H I J

rename C Country
rename L age0_4
rename M age5_9
rename N age10_14
rename O age15_19
rename P age20_24
rename Q age25_29
rename R age30_34
rename S age35_39
rename T age40_44
rename U age45_49
rename V age50_54
rename W age55_59
rename X age60_64
rename Y age65_69
rename Z age70_74
rename AA age75_79
rename AB age80_84
rename AC age85_89
rename AD age90_94
rename AE age95_99 
rename AF age100_plus

keep if K == "2021"
drop in 1
drop K

destring age0_4 age5_9 age10_14 age15_19 age20_24 age25_29 age30_34 age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69 age70_74 age75_79 age80_84 age85_89 age90_94 age95_99 age100_plus, replace force

// Define your list of variables explicitly
local agevars age0_4 age5_9 age10_14 age15_19 age20_24 age25_29 age30_34 age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69 age70_74 age75_79 age80_84 age85_89 age90_94 age95_99 age100_plus

// Loop through variables, multiplying each by 1000
foreach var of varlist `agevars' {
    replace `var' = `var' * 1000
}

egen pop_30_69 = rowtotal(age30_34 age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69)
egen pop_40_69 = rowtotal(age40_44 age45_49 age50_54 age55_59 age60_64 age65_69)

format pop_30_69 pop_40_69 %12.0f

drop age0_4 age5_9 age10_14 age15_19 age20_24 age25_29 age30_34 age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69 age70_74 age75_79 age80_84 age85_89 age90_94 age95_99 age100_plus

replace Country = "Brunei" if Country == "Brunei Darussalam"
replace Country = "Czech Republic" if Country == "Czechia"
replace Country = "Iran" if Country == "Iran (Islamic Republic of)"
replace Country = "Laos" if Country == "Lao People's Democratic Republic"
replace Country = "Moldova" if Country == "Republic of Moldova"
replace Country = "Palestine" if Country == "State of Palestine"
replace Country = "South Korea" if Country == "Republic of Korea"
replace Country = "Timor Leste" if Country == "Timor-Leste"
replace Country = "Venezuela" if Country == "Venezuela (Bolivarian Republic of)"
replace Country = "Vietnam" if Country == "Viet Nam"
replace Country = "Wallis and Futuna" if Country == "Wallis and Futuna Islands"

drop if Country == "Australia/New Zealand"
drop if Country == "Latin America and the Caribbean"
drop if Country == "United Republic of Tanzania" // please see pop estimate derivation for Tanzania and Zanzibar in lines 771-778
drop if Country == "United Kingdom" // please see pop estimate derivation for England in lines 781-783

sort Country
save "WPP_POP_2021.dta", replace

* Merge WPP 2021 estimates into the appended dataset
use "HPACC_Maindata_appended.dta", clear
merge m:1 Country using "WPP_POP_2021.dta"
drop if _merge==2
drop _merge

* Utilize the WPP estimate for the United Republic of Tanzania (pop_30_69 = 17013810; pop_40_69 = 9585849) and a derived estimate from the Basic Demographic and Socio-Economic Profile Report, published by Tanzania's National Bureau of Statistics, that Zanzibar made up approximately 3.1% of Tanzania's total population in 2021 (https://sensa.nbs.go.tz/publication/02.%20Mainland_Demographic_and_Socioeconomic_Profile.pdf)
replace pop_30_69 = 17013810 * 0.969 if Country == "Tanzania"
replace pop_40_69 = 9585849 * 0.969 if Country == "Tanzania"

replace pop_30_69 = 17013810 * 0.031 if Country == "Zanzibar"
replace pop_40_69 = 9585849 * 0.031 if Country == "Zanzibar"

* Utilize the WPP estimate for the United Kingdom (pop_30_69 = 33914452; pop_40_69 = 24895042) and a derived estimate from an Office for National Statistics report that England made up about 84.3% of the total population of the United Kingdom in 2021 (https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/articles/overviewoftheukpopulation/january2021)
replace pop_30_69 = 33914452 * 0.843 if Country == "England"
replace pop_40_69 = 24895042 * 0.843 if Country == "England"

/***************************************************************************************/
* Stata code Converted from R code: Rescaling Procedure
/***************************************************************************************/
gen t_age30_34 = 7.61
gen t_age35_39 = 7.15
gen t_age40_44 = 6.59
gen t_age45_49 = 6.04
gen t_age50_54 = 5.37
gen t_age55_59 = 4.55
gen t_age60_64 = 3.72
gen t_age65_69 = 2.96

gen str10 age_group = ""
replace age_group = "A30 - 34" if age >= 30 & age < 35
replace age_group = "A35 - 39" if age >= 35 & age < 40
replace age_group = "A40 - 44" if age >= 40 & age < 45
replace age_group = "A45 - 49" if age >= 45 & age < 50
replace age_group = "A50 - 54" if age >= 50 & age < 55
replace age_group = "A55 - 59" if age >= 55 & age < 60
replace age_group = "A60 - 64" if age >= 60 & age < 65
replace age_group = "A65 - 69" if age >= 65 & age < 70

encode age_group, gen(age_group_fac)
label define agegrp_lbl 1 "A30 - 34" 2 "A35 - 39" 3 "A40 - 44" 4 "A45 - 49" ///
                        5 "A50 - 54" 6 "A55 - 59" 7 "A60 - 64" 8 "A65 - 69"
label values age_group_fac agegrp_lbl

gen WHO = .
replace WHO = t_age30_34 if age_group == "A30 - 34"
replace WHO = t_age35_39 if age_group == "A35 - 39"
replace WHO = t_age40_44 if age_group == "A40 - 44"
replace WHO = t_age45_49 if age_group == "A45 - 49"
replace WHO = t_age50_54 if age_group == "A50 - 54"
replace WHO = t_age55_59 if age_group == "A55 - 59"
replace WHO = t_age60_64 if age_group == "A60 - 64"
replace WHO = t_age65_69 if age_group == "A65 - 69"

gen WHO_2 = .
replace WHO_2 = t_age40_44 if age_group == "A40 - 44"
replace WHO_2 = t_age45_49 if age_group == "A45 - 49"
replace WHO_2 = t_age50_54 if age_group == "A50 - 54"
replace WHO_2 = t_age55_59 if age_group == "A55 - 59"
replace WHO_2 = t_age60_64 if age_group == "A60 - 64"
replace WHO_2 = t_age65_69 if age_group == "A65 - 69"

rename Country country

////////////////////////////////////////////////////////////////////////////////
///////////////////////////// MAIN ANALYSIS USING POPULATION WEIGHTS ///////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************

-	We are generating population weights for the 30-69 population even if a country
	has incomplete age ranges in the survey. For example, even in a STEPS survey
	missing the 65-69 age range (because it sampled only to age 64), we still 
	are using the country's entire population across the 30-69 range.
	
- 	Age standardization is tricky and imperfect when there are missing age ranges
	in the underlying surveys. We can only age standardize to the available
	population age ranges. We need to be careful and not just apply WHO standardization
	but instead do a adjustmenbt procedure to maintain the notion that the weights
	are rescaled to the population of each country 30-69 years old.
	
- 	If we simply rescale to the WHO 5-year band %  without the adjustment,
	this would undercount a country like India which only has people 45 years and above.  
	In India, we only have 60-64 and 65-69, so only 3.72% and 2.96% categories respectivey.
	This is only ~6.68% of the total population or 6.68/43.99 = 15.22% of the 
	Indian population ages 30-69 when you sum the WHO 5-year bands from 30-69. 
	(The 43.99 number comes from the sum of WHO 5-year bands across this whole
	age range)
	
-  	Note that we rescale weights to 0 for people not in the the subpopulation
	of interest so they can contribute to variance estimate but not impact the 
	weighted estimates themselves.

******************************************************************************/
***** CRUDE WEIGHTS
* Generates crude population weights from w3 rescaled to its 30-69 population if in sample
bys country: egen sum_wpop_sample_w3=sum(w3) if sample_dm_diag == 1
gen w3_crude_30_69=w3*pop_30_69/sum_wpop_sample_w3
replace w3_crude_30_69 = w3_crude_30_69

* Generates crude population weights from w3 rescaled to its 40-69 population if in sample
bys country: egen sum_wpop_sample_w3_2=sum(w3) if sample_statin_dm == 1
gen w3_crude_40_69=w3*pop_40_69/sum_wpop_sample_w3_2
replace w3_crude_40_69 = w3_crude_40_69
	
***** AGE-STANDARDIZED WEIGHTS
* Step 1: Gen "country_portion", or the weights in each 5-year age group by country
gen country_portion = .
bysort country age_group_fac: egen sum_scaled = total(w3_crude_30_69)
replace country_portion = sum_scaled / pop_30_69

gen country_portion_2 = .
bysort country age_group_fac: egen sum_scaled_2 = total(w3_crude_40_69)
replace country_portion_2 = sum_scaled_2 / pop_40_69

* Step 2: Sum the total WHO percentages from 30-69 years by country, accounting for each country's specific age coverage
	gen sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						t_age50_54 + t_age55_59 + t_age60_64 + t_age65_69

	* Replace sum_WHO_30_69 for countries with age range 30-64
	* These countries don't include the 65-69 age group, so we exclude t_age65_69 from the sum
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Burkina Faso"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Cambodia"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Comoros"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Cook Islands"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Fiji"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Kyrgyzstan"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Lesotho"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Liberia"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Mozambique"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Myanmar"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Palau"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Palestine"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Qatar"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Rwanda"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Samoa"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Seychelles"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Tanzania"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Togo"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Trinidad and Tobago"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Uruguay"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Vanuatu"
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Zanzibar"

	* Replace sum_WHO_30_69 for Czech Republic (age range 30-65)
	* The t_age65_69 weight needs to be partially included (just for age 65)
	* We include 1/5 of the t_age65_69 weight to represent only age 65
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 + 0.2*t_age65_69 if country == "Czech Republic"

	* Replace sum_WHO_30_69 for Laos (age range 30-65)
	* We include all weights up through t_age55_59 and 3/5 of t_age60_64 to represent ages 60-63
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + 0.6*t_age60_64 if country == "Laos"

	* Replace sum_WHO_30_69 for countries with age range 35-64 (Haiti, Namibia)
	* These countries start at 35 and end at 64, so we exclude t_age30_34 and t_age65_69
	replace sum_WHO_30_69 = t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Haiti"
	replace sum_WHO_30_69 = t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 + t_age60_64 if country == "Namibia"

	* Replace sum_WHO_30_69 for China (age range 45-69)
	* China's data only covers ages 45-69, so we exclude the younger age groups
	replace sum_WHO_30_69 = t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 + t_age65_69 if country == "China"

	* Replace sum_WHO_30_69 for India (age range 60-69)
	* India only has data for the oldest two age groups
	replace sum_WHO_30_69 = t_age60_64 + t_age65_69 if country == "India"

	* Replace sum_WHO_30_69 for Peru (age range 30-59)
	* Peru only has data up through age 59, so we exclude the older age groups
	replace sum_WHO_30_69 = t_age30_34 + t_age35_39 + t_age40_44 + t_age45_49 + ///
						   t_age50_54 + t_age55_59 if country == "Peru"

	* Sum the total WHO percentages from 40-69 years by country, accounting for each country's specific age coverage
	gen sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 + t_age65_69

	* Replace sum_WHO_40_69 for countries with age range 40-64
	* These countries don't include the 65-69 age group, so we exclude t_age65_69 from the sum
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Burkina Faso"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Cambodia"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Comoros"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Cook Islands"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Fiji"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Kyrgyzstan"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Lesotho"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Liberia"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Mozambique"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Myanmar"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Palau"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Palestine"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Qatar"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Rwanda"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Samoa"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Seychelles"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Tanzania"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Togo"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Trinidad and Tobago"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Uruguay"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Vanuatu"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Zanzibar"

	* Replace sum_WHO_40_69 for Czech Republic (age range 40-65)
	* The t_age65_69 weight needs to be partially included (just for age 65)
	* We include 1/5 of the t_age65_69 weight to represent only age 65
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 + 0.2*t_age65_69 if country == "Czech Republic"

	* Replace sum_WHO_40_69 for Laos (age range 30-65)
	* We include all weights up through t_age55_59 and 3/5 of t_age60_64 to represent ages 60-63
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + 0.6*t_age60_64 if country == "Laos"

	* Replace sum_WHO_40_69 for countries with age range 35-64 (Haiti, Namibia)
	* These countries start at 35 and end at 64, so we exclude t_age30_34 and t_age65_69
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Haiti"
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 if country == "Namibia"

	* Replace sum_WHO_40_69 for China (age range 45-69)
	* China's data only covers ages 45-69, so we exclude the younger age groups
	replace sum_WHO_40_69 = t_age45_49 + t_age50_54 + t_age55_59 + t_age60_64 + t_age65_69 if country == "China"

	* Replace sum_WHO_40_69 for India (age range 60-69)
	* India only has data for the oldest two age groups
	replace sum_WHO_40_69 = t_age60_64 + t_age65_69 if country == "India"

	* Replace sum_WHO_40_69 for Peru (age range 30-59)
	* Peru only has data up through age 59, so we exclude the older age groups
	replace sum_WHO_40_69 = t_age40_44 + t_age45_49 + t_age50_54 + t_age55_59 if country == "Peru"
						   
* Step 3: Now we calculate the WHO portion for each age group
	* This represents the proportion of the WHO standard population within each country's specific age range
	gen WHO_portion = WHO / sum_WHO_30_69
	
	gen WHO_portion_2 = WHO_2 / sum_WHO_40_69
		
* Step 4: Calculate the adjustment factor
	* This compares the WHO standard proportion to the actual country proportion
	* If WHO_portion > country_portion, this will increase the weight (and vice versa)
	* So ultimately within each country, you maintain the population age distribution of WHO standard pop
gen adj_factor = .
replace adj_factor = WHO_portion / country_portion

gen adj_factor_2 = .
replace adj_factor_2 = WHO_portion_2 / country_portion_2

* Step 5: Define WHO age-standardized weights
gen w3_who_30_69 = .
replace w3_who_30_69 = w3_crude_30_69 * adj_factor 

gen w3_who_40_69 = .
replace w3_who_40_69 = w3_crude_40_69 * adj_factor_2

***** FINAL TINKERING OF WEIGHTS		
* Weights are in units of 1000s rather than of person, to facilitate computation *
replace w3_crude_30_69 = w3_crude_30_69/1000
replace w3_who_30_69 = w3_who_30_69/1000

replace w3_crude_40_69 = w3_crude_40_69/1000
replace w3_who_40_69 = w3_who_40_69/1000

* Making weights 0 if out of sample, to make variance esimation correct
replace w3_crude_30_69 = 0 if sample_dm_diag == 0
replace w3_who_30_69 = 0 if sample_dm_diag == 0

replace w3_crude_40_69 = 0 if sample_statin_dm == 0
replace w3_who_40_69 = 0 if sample_statin_dm == 0
										
save "HPACC_Maindata_appended.dta", replace